# Loading Packages
library(data.table)
library(lubridate)
library(tidyverse)
library(esquisse)
library(plyr)
library(ggplot2)
library(cowplot)
library(naniar) #for NA exploration
library(sp) #spatial data
library(ggmap) #for map
library(osmdata) #openstreetmap
library(reshape2)
library(plotly)
Laptop_Sales_Data <- fread("DATA/LaptopSales_red.csv")
is.data.table(Laptop_Sales_Data)
## [1] TRUE
summary(Laptop_Sales_Data)
## V1 Date Configuration Customer.Postcode
## Min. : 2 Length:148786 Min. : 1.0 Length:148786
## 1st Qu.: 74275 Class :character 1st Qu.:192.0 Class :character
## Median :148450 Mode :character Median :347.0 Mode :character
## Mean :148742 Mean :379.7
## 3rd Qu.:223162 3rd Qu.:576.0
## Max. :297572 Max. :864.0
##
## Store.Postcode Retail.Price Screen.Size..Inches. Battery.Life..Hours.
## Length:148786 Min. :168.0 Min. :15.00 Min. :4.000
## Class :character 1st Qu.:440.0 1st Qu.:15.00 1st Qu.:4.000
## Mode :character Median :500.0 Median :15.00 Median :5.000
## Mean :508.1 Mean :15.81 Mean :4.973
## 3rd Qu.:575.0 3rd Qu.:17.00 3rd Qu.:6.000
## Max. :890.0 Max. :17.00 Max. :6.000
## NA's :6656
## RAM..GB. Processor.Speeds..GHz. Integrated.Wireless. HD.Size..GB.
## Min. :1.000 Min. :1.50 Length:148786 Min. : 40.0
## 1st Qu.:1.000 1st Qu.:1.50 Class :character 1st Qu.: 40.0
## Median :2.000 Median :2.00 Mode :character Median : 80.0
## Mean :2.101 Mean :1.93 Mean :132.2
## 3rd Qu.:2.000 3rd Qu.:2.40 3rd Qu.:120.0
## Max. :4.000 Max. :2.40 Max. :300.0
##
## Bundled.Applications. customer.X customer.Y store.X
## Length:148786 Min. :512253 Min. :164886 Min. :517917
## Class :character 1st Qu.:529098 1st Qu.:178716 1st Qu.:528924
## Mode :character Median :530928 Median :181083 Median :529902
## Mean :530748 Mean :179890 Mean :530644
## 3rd Qu.:533076 3rd Qu.:182060 3rd Qu.:534057
## Max. :549065 Max. :199846 Max. :541428
## NA's :85
## store.Y
## Min. :168302
## 1st Qu.:178440
## Median :179641
## Mean :179757
## 3rd Qu.:181567
## Max. :190628
## NA's :85
str(Laptop_Sales_Data)
## Classes 'data.table' and 'data.frame': 148786 obs. of 17 variables:
## $ V1 : int 171289 38634 260048 166045 243280 118859 249957 198058 198850 267007 ...
## $ Date : chr "9/20/2008 2:49" "5/30/2008 9:52" "12/10/2008 9:26" "9/15/2008 9:41" ...
## $ Configuration : int 528 307 235 168 517 738 301 301 479 472 ...
## $ Customer.Postcode : chr "NW5 1SP" "N6 6BU" "CR0 2BW" "WC2H 9PS" ...
## $ Store.Postcode : chr "N3 1DH" "N3 1DH" "CR7 8LE" "SW1P 3AU" ...
## $ Retail.Price : int 413 515 315 NA 580 535 455 465 600 392 ...
## $ Screen.Size..Inches. : int 17 15 15 15 17 17 15 15 17 17 ...
## $ Battery.Life..Hours. : int 4 6 5 5 4 6 6 6 4 4 ...
## $ RAM..GB. : int 2 1 2 1 2 1 1 1 1 1 ...
## $ Processor.Speeds..GHz.: num 2.4 2 2.4 2 2.4 2 1.5 1.5 2.4 2.4 ...
## $ Integrated.Wireless. : chr "No" "Yes" "No" "Yes" ...
## $ HD.Size..GB. : int 300 80 80 300 120 40 120 120 300 300 ...
## $ Bundled.Applications. : chr "No" "Yes" "Yes" "No" ...
## $ customer.X : int 528771 528281 532781 530190 537350 532498 533130 529390 533998 532498 ...
## $ customer.Y : int 186041 187336 166444 181139 169306 168334 182489 181270 168421 168334 ...
## $ store.X : int 525109 525109 532714 529902 528739 528739 534057 528924 528739 532714 ...
## $ store.Y : int 190628 190628 168302 179641 173080 173080 179682 178440 173080 168302 ...
## - attr(*, ".internal.selfref")=<externalptr>
gg_miss_var(Laptop_Sales_Data, show_pct = TRUE)
#### Set Up a Data Subset and NA OMIT
Retail_Price_and_Dates <- Laptop_Sales_Data[,.(Retail.Price,Date)][,Date:=mdy_hm(Date)]
Retail_Price_and_Dates <- na.omit(Retail_Price_and_Dates)
#### Histogram of the Retail Price of Computer In 2018
ggplotly(
ggplot(Retail_Price_and_Dates) +
aes(x = Retail.Price) +
geom_histogram(bins = 30L, fill = "#1c6155") +
labs(x = "Price", y = "Frequency", title = "Histogram of the Retail Price of Computer", subtitle = "In 2018") +
theme_minimal()
)
#### Boxplot of the Retail Price of Computer In 2018
ggplotly(
ggplot(Retail_Price_and_Dates) +
aes(x = "", y = Retail.Price) +
geom_boxplot(fill = "#1c6155") +
labs(y = "Price",
title = "Boxplot of the Retail Price of Computer", subtitle = "In 2018") +
theme_minimal()
)
# Actual price
Max_Date_Retail <- max(Retail_Price_and_Dates$Date)
Actual_Price <- Retail_Price_and_Dates[Date %in% Max_Date_Retail, ]
print(paste("Last Recorded Prices are", Actual_Price[1,1], "USD", "and", Actual_Price[2,1],"USD","on the same Day"))
## [1] "Last Recorded Prices are 406 USD and 530 USD on the same Day"
Retail_Price_and_Dates_Month <- Retail_Price_and_Dates[, mean(Retail.Price), by = floor_date(Date,unit="month")]
colnames(Retail_Price_and_Dates_Month)[2] <- "Mean_Retail_Price"
Retail_Price_and_Dates_Week <- Retail_Price_and_Dates[, mean(Retail.Price), by = floor_date(Date,unit = "week")]
colnames(Retail_Price_and_Dates_Week)[2] <- "Mean_Retail_Price"
Retail_Price_and_Dates_Day <- Retail_Price_and_Dates[, mean(Retail.Price), by = floor_date(Date,unit = "day")]
colnames(Retail_Price_and_Dates_Day)[2] <- "Mean_Retail_Price"
# Retail Price By Month
ggplotly(
ggplot(Retail_Price_and_Dates_Month) +
aes(x = floor_date, y = Mean_Retail_Price) +
geom_line(size = 1.1,
colour = "#112446") +
labs(x = "Month", y = "Price", title = "Retail Price of Computer in 2018",
subtitle = "Aggregated by Month") +
theme_classic()
)
# Retail Price By Week
ggplotly(
ggplot(Retail_Price_and_Dates_Week) +
aes(x = floor_date, y = Mean_Retail_Price) +
geom_line(size = 0.4,
colour = "#112446") +
labs(x = "Week", y = "Price", title = "Retail Price of Computer in 2018",
subtitle = "Aggregated by Week") +
theme_classic()
)
# Retail Price By Day
ggplotly(
ggplot(Retail_Price_and_Dates_Day) +
aes(x = floor_date, y = Mean_Retail_Price) +
geom_line(size = 0.2,
colour = "#112446") +
labs(x = "Day", y = "Price", title = "Retail Price of Computer in 2018",
subtitle = "Aggregated by Day") +
theme_classic()
)
#### Set Up a Data Subset and NA OMIT
Retail_Price_Outlets_Date <- Laptop_Sales_Data[,.(Retail.Price,Store.Postcode,Date)][,Date:=mdy_hm(Date)]
Retail_Price_Outlets_Date <- na.omit(Retail_Price_Outlets_Date)
Retail_Price_Configuration <- Laptop_Sales_Data[,.(Retail.Price,Configuration,Screen.Size..Inches.,Battery.Life..Hours.,RAM..GB.,Processor.Speeds..GHz.,Integrated.Wireless.,HD.Size..GB.,Bundled.Applications.)]
Retail_Price_Configuration <- na.omit(Retail_Price_Configuration)
#### Price Decomposed by Retail Outlets
#### Boxplot Across Retail Outlets
ggplotly(
ggplot(Retail_Price_Outlets_Date) +
aes(x = Store.Postcode, y = Retail.Price) +
geom_boxplot(fill = "#112446") +
labs(x = "Stores Postcode", y = "Price", title = "Boxplot Of The Retail Price Across Stores", subtitle = "In 2018") +
theme_classic() + scale_x_discrete(guide = guide_axis(n.dodge = 1)) + theme(axis.text.x=element_text(size=rel(1), angle=90))
)
#### Retail Price Across Stores During 2018
Retail_Price_Outlets_Date_Month <- Retail_Price_Outlets_Date[,Floor.Date:=floor_date(Date,unit="month")][,c(Mean_Price=mean(Retail.Price)), by=list(Store.Postcode,Floor.Date)]
colnames(Retail_Price_Outlets_Date_Month)[3] <- "Mean_Retail_Price"
#### Plot of the Monthly Retail Price per Stores
ggplotly(
ggplot(Retail_Price_Outlets_Date_Month) +
aes(x = Floor.Date, y = Mean_Retail_Price, colour = Store.Postcode) +
geom_line(size = 0.5) +
scale_color_hue(direction = 1) +
labs(x = "Month", y = "Price", title = "Retail Price Across Months and Grouped by Stores",
subtitle = "In 2018") +
theme_classic()
)
#### Plot Of The Retail Price per Configuration
ggplotly(
ggplot(Retail_Price_Configuration) +
aes(x = Configuration, y = Retail.Price, colour = Battery.Life..Hours.) +
geom_point(shape = "circle", size = 0.8) +
scale_color_gradient() +
labs(y = "Retail Price", title = "Retail Price and Configuration ",
subtitle = "In 2018") +
theme_classic()
)